
#################################################################################################################################
#                                   Calculating Relevant Variables for Regression Analyses 
#                                   and Recreating All Tables and Figures in the Paper
#
# The first part of this script loads the datasets generated by 'cleaning_data3.R'. 
# Using 'cleaning_data3.R', we created:
# - Cleaned data where each observation represents an account, stock, and date:
#   - `data_peak_prices_cleaned.csv`
#
# - Cross-sectional data where each observation represents an account:
#   - `data_peak_cross_sec_acc.csv`
#
# - Peak prices data where the peak is the highest price each investor has experienced since the purchase:
#   - `MAXC_peak_5update.csv` (using a minimum of 5 business days to update the peak)
#   - `MAXC_peak_20update.csv` (using a minimum of 20 business days to update the peak)
#
# - Peak prices determined within a one-year window:
#   - `peak_past_year_5update.csv` (using a minimum of 5 business days to update the peak)
#   - `peak_past_year_20update.csv` (using a minimum of 20 business days to update the peak)
#
# The second part of this script executes regressions and recreates all tables and figures presented in the paper.
#
#################################################################################################################################





rm(list = ls(all=TRUE))
library(data.table)
memory.limit(size=10000000)
memory.limit()
Sys.setenv(LANG="en")
Sys.setlocale("LC_TIME","us")

library(zoo)
library(statar)
library(dplyr)
library(lubridate)
library(dplyr)
library(Rmisc)
library(gmodels)
library(ggplot2)  
library(xtable)


wd_tables="d:/settings/beset/Desktop/GITHUB/peak_prices/tables/"
wd_figures="d:/settings/beset/Desktop/GITHUB/peak_prices/figures/"
wd_peak="D:/Peak_prices_19_data/"
wd_code="d:/settings/beset/Desktop/GITHUB/peak_prices"

setwd(wd_peak)
data.and.login.date.m<- fread("data_peak_prices_cleaned.csv")
data.and.login.date.m[,":="(date=NULL, missing_pur.price=NULL, upp=NULL, af=NULL,
                            raw_sell=NULL, raw_buy=NULL,
                            unit_price=NULL, B_asset_class=NULL,
                            currency=NULL, DS_STOCK_TYPE=NULL,                 
                            DS_SECURITY_TYPE=NULL, DS_GEOGRAPHY_GROUP_NAME=NULL, new_ac=NULL, INDUSTRY_GROUP2_NEW=NULL)]
data.and.login.date.m[,port_date:=as.Date(port_date, "%Y-%m-%d")]
data.and.login.date.m[, pur_day           :=as.Date(pur_day , "%Y-%m-%d")]
data.and.login.date.m[, last_td:= as.Date(last_td, "%Y-%m-%d")]

setwd(wd_code)

# The script "prices_last_login.R" calculates the prices on the most recent login days 
# and merges them with the existing data in `data.and.login.date.m`

source("prices_last_login.R")

setwd(wd_peak)



##################################################
# Reading Peak Data and Selecting Files
# -------------------------------------
# This section reads the peak data files:
# - "MAXC_peak_5update.csv"
# - "MAXC_peak_20update.csv"
#
# Additionally, the loop runs the analysis on both login days and sell days.
##################################################


list_data_name_read=c("MAXC_peak_5update.csv",
                      "MAXC_peak_20update.csv")

list_days=c("login", "sell")


for (data_number in 1:2){
  for (days_number in 1:2){
    
    data_name_read=list_data_name_read[data_number]
    use_days=list_days[days_number]
    print(data_name_read)
    print(use_days)
    
    setwd(wd_peak)
    DT_peak_or_min<- fread(data_name_read)
    DT_peak_or_min[,port_date:=as.Date(port_date, "%Y-%m-%d")]

    # Renaming the columns to standardize the peak price, the date of the peak, 
    # and whether the current price surpassed the past peak. 
    # These values are stored in `app_past_point`, `date_past_point`, and `pass_point_dummy`.
    
    
        
    if (data_name_read=="MAXC_peak_5update.csv"){
      setnames(DT_peak_or_min, old = c("app_past_peak_MAXC_up5", "date_past_peak_MAXC_up5" , "pass_peak_MAXC_up5"), 
               new = c("app_past_point","date_past_point", "pass_point_dummy"))
      
      plot_name="MAXC_peak_5update"
      period_name="Week"
    }
    
    
    if (data_name_read=="MAXC_peak_20update.csv"){
      setnames(DT_peak_or_min, old = c("app_past_peak_MAXC_up20", "date_past_peak_MAXC_up20" , "pass_peak_MAXC_up20"), 
               new = c("app_past_point","date_past_point", "pass_point_dummy"))
      
      plot_name="MAXC_peak_20update"
      period_name="Month"
      
    }
    
    
    
   
    data_for_peaks<- merge(data.and.login.date.m, DT_peak_or_min[,.(Code_used_DS,  port_date ,  anon,app_past_point ,date_past_point ,pass_point_dummy)],
                           by=c("anon", "Code_used_DS",  "port_date"), all.x=T)

    DT_peak_or_min=NULL

    
    total_number_transactions<- (unique(data_for_peaks[sell==1 | buy==1,.(port_date, anon)]))[,.N, by=.(anon)][,total_number_transactions:=N][,.(anon,total_number_transactions )]
    total_number_logins<- (unique(data_for_peaks[,.(port_date, anon)]))[,.N, by=.(anon)][,total_login:=N][,.(anon, total_login)]
    ratio<- merge(total_number_transactions,total_number_logins, by=c("anon"), all.x=T )[, ratio_tr_login:=total_number_transactions/ total_login]
    data_for_peaks<-merge(data_for_peaks, ratio, by=c("anon"), all.x=T)
    
    # Computing Returns
    # -----------------
    # The following columns are calculated:
    # - `return.since.point`: The return since the past peak.
    # - `gain.since.point`: A dummy variable indicating a gain (1 for gain, 0 otherwise).
    # - `loss.since.point`: A dummy variable indicating a loss (1 for loss, 0 otherwise).
    # - `return.since.point_pos100`: Return percentage for positive returns.
    # - `return.since.point_neg100`: Return percentage for negative returns.
    
    data_for_peaks[, date_past_point := as.Date(date_past_point, "%Y-%m-%d")]
    
    data_for_peaks[!is.na(app_past_point), return.since.point := (app - app_past_point) / app_past_point]
    data_for_peaks[!is.na(app_past_point), return.since.point100 := return.since.point * 100]
    data_for_peaks[!is.na(app_past_point), gain.since.point := ifelse(return.since.point > 0, 1, 0)]
    data_for_peaks[!is.na(app_past_point), loss.since.point := ifelse(return.since.point < 0, 1, 0)]
    
    # Calculating return percentages for positive and negative returns
    data_for_peaks[!is.na(return.since.point), return.since.point_pos100 := ifelse(return.since.point > 0, return.since.point * 100, 0)]
    data_for_peaks[!is.na(return.since.point), return.since.point_neg100 := ifelse(return.since.point <= 0, return.since.point * 100, 0)]
    

    # Flag cases where there is no gain since purchase but a gain since the last peak.
    # These are situations where an investor makes a purchase at a high price,
    # causing the quantity-weighted average purchase price (QWAPP) to exceed a previous peak.
    # However, the peak can only be updated after a week, and it won't be updated if prices continue to rise.
    # This results in an apparent gain since the peak, but a loss since purchase when using the QWAPP.
    
    data_for_peaks[gain.since.pur == 0 & gain.since.point == 1, flag_no_peak := 1]
    
    # Check the prevalence of these flagged cases (less than 1% of observations).
    data_for_peaks[, .N, by = .(flag_no_peak)]
    

    # Calculating Mean Returns for Bins
    # ---------------------------------
    # We are dividing the data into bins and then calculating the mean returns within each bin 
    # to be used in binned scatterplots.
    

    nbins=100

    data_for_peaks[is.na(flag_no_peak),percentiles_point:=xtile(return.since.point,n=nbins)]
    data_for_peaks[is.na(flag_no_peak),percentiles_mean_point:=mean(return.since.point100), by=.(percentiles_point)]
    
    data_for_peaks[is.na(flag_no_peak),percentiles:=xtile(return.since.pur,n=nbins)]
    data_for_peaks[is.na(flag_no_peak),percentiles_mean:=mean(return.since.pur100), by=.(percentiles)]
    

    # Identifying and Removing Outliers in Returns

    data_for_peaks[!is.na(return.since.pur), return.since.pur_pos100 := ifelse(return.since.pur > 0, return.since.pur * 100, 0)]
    data_for_peaks[!is.na(return.since.pur), return.since.pur_neg100 := ifelse(return.since.pur <= 0, return.since.pur * 100, 0)]
    
    # Filter out rows with flagged peaks
    data_for_peaks <- data_for_peaks[is.na(flag_no_peak)]
    
    # Summarize returns to identify cut-off points for outliers
    return_sum <- sum_up(data_for_peaks[, grep("return", names(data_for_peaks), value = TRUE), with = FALSE], d = TRUE)
    
    # Determine 1st and 99th percentiles for outlier detection
    cut.out.return <- as.data.table(return_sum)[Variable == "return.since.pur100", .(p1, p99)]
    cut.out.return.point <- as.data.table(return_sum)[Variable == "return.since.point100", .(p1, p99)]
    
    # Flag and remove outliers based on the 1st and 99th percentiles
    data_for_peaks[, outlier := 0]
    data_for_peaks[return.since.pur100 > cut.out.return$p99 | return.since.pur100 < cut.out.return$p1, outlier := 1]
    data_for_peaks[return.since.point100 > cut.out.return.point$p99 | return.since.point100 < cut.out.return.point$p1, outlier := 1]
    
    # Keep only non-outlier data
    data_for_peaks <- data_for_peaks[outlier == 0]
    
    # Remove rows with missing return values
    data_for_peaks <- data_for_peaks[!is.na(return.since.point) & !is.na(return.since.pur)]
    

    label_table= "Peak"
    
    anon_in_sample_after_outliers<-data_for_peaks[num_sales_the_day>0][,.N, by=.(anon)]
    
    data_for_peaks<- data_for_peaks[anon %in% anon_in_sample_after_outliers$anon]
    
    # Merging with FTSE100 Data
    # Load FTSE100 return data
    ftse100 <- fread("D:/files_moved_19/output datastream/datastream data/ftse100 return.csv")
    
    library(lubridate)
    ftse100[, port_date := as.Date(valuedate_c, "%d%B%Y")]
    
    ftse100[, valuedate_c := NULL]
    ftse100 <- ftse100[order(port_date)]
    
    # Calculate the FTSE100 return for the previous day
    ftse100[, return_FTSE100_yest := c(NA, .SD[1:(nrow(.SD) - 1), return_FTSE100])]
    
    # Merge FTSE100 data with peaks data
    data_for_peaks <- merge(data_for_peaks, ftse100, by = "port_date", all.x = TRUE)
    
    # Calculate the time distance from the purchase date and the peak date
    data_for_peaks[, distance_pur := as.numeric(port_date - pur_day)]
    data_for_peaks[, distance_peak := as.numeric(port_date - date_past_point)]
    
    # Sample Selection: Sell Days vs. Login Days
    if (use_days == "sell") {
      data_for_peaks <- data_for_peaks[num_sales_the_day > 0]
    }
    
  
    ##############################
    # The script "excess_returns.R" analyzes excess returns and disposition effects using data from "MAXC_peak_5update.csv" 
    # for the sell day sample.
    #
    # Part 1: Excess Returns Calculation
    # ----------------------------------
    # Calculates excess post-sale returns relative to FTSE100 over three periods (1 month, 166 days, 1 year) for realized gains and unrealized losses.
    #
    # Output Tables:
    # - **Table A43:** Average Returns 
    # - **Table 11:** Ex Post Returns 
    # - **Table A44:** Returns by Frequency of Peaks 
    #
    # Part 2: Volatility Analysis
    # ---------------------------
    # Examines returns based on asset volatility.
    #
    # Output Table:
    # - **Table A45:** Ex Post Returns by Volatility 
    #
    # Part 3: Robustness Checks
    # -------------------------
    # 
    # Analysis of the purchase and peak price disposition effects, splitting the sample into ten deciles based on returns since purchase.
    #
    # Output Table:
    # - **Table 6:** Disposition Effects by Decile 
    #
    # Tests investor responses to various gain measures.
    #
    # Output Table:
    # - **Table 8:** Controlling for Alternative Reference Points 
    #
    # Part 4: Timing of Peaks
    # -----------------------
    # Investigates how the timing of peaks influences selling decisions.
    
    # Output Table:
    # - **Table 9:** Impact of Peak Timing 
    ##############################
    
    
    setwd(wd_code)    
    
    source("excess_returns.R")

    
    ##############################
    # PLOTTING PATTERNS OF THE DISPOSITION EFFECT
    # -------------------------------------------
    # These figures illustrate the probability of stock sales as a function of returns since purchase and returns since peak price.
    # The panels display binscatter plots showing the relationship between the probability of sale and:
    # - Returns since purchase (Panel A)
    # - Returns since peak price (Panel B)
    # - The interaction between returns since purchase and returns since peak price (Panel C)
    #
    # The analysis is conducted for different definitions of peaks (updated after a week or a month) 
    # and for both the login day sample and the sell day sample.
    
    # **Figure 3: Probability of Stock Sale, Returns Since Purchase, and Returns Since Peak**
    # - `figures/patter_DE_purch_MAXC_peak_5update_sellsample.pdf`
    # - `figures/patter_DE_since_MAXC_peak_5update_sellsample.pdf`
    # - `figures/patter_DE_interaction_MAXC_peak_5update_sellsample.pdf`
    
    # **Figure A16: Probability of Stock Sale, Returns Since Purchase, and Returns Since Peak (Month Definition)**
    # - `figures/patter_DE_purch_MAXC_peak_20update_sellsample.pdf`
    # - `figures/patter_DE_since_MAXC_peak_20update_sellsample.pdf`
    # - `figures/patter_DE_interaction_MAXC_peak_20update_sellsample.pdf`
    
    # **Figure A18: Probability of Stock Sale, Returns Since Purchase, and Returns Since Peak in the Login-Day Sample**
    # - `figures/patter_DE_purch_MAXC_peak_5update_loginsample.pdf`
    # - `figures/patter_DE_since_MAXC_peak_5update_loginsample.pdf`
    # - `figures/patter_DE_interaction_MAXC_peak_5update_loginsample.pdf`
    
    # Additionally, we reproduce the analysis with a triple interaction between gains since purchase, 
    # gains since the past peak price, and gains since the most recent login. This is documented 
    # in Quispe-Torreblanca et al. (2024) and presented in Figure A19 of the paper.
    
    # **Figure A19: Probability of Stock Sale, Returns Since Purchase, Returns Since Peak, and Returns Since Last Login Day**
    # - `figures/patter_DE_triple_interaction_MAXC_peak_5update_sellsample.pdf`
    ##############################
    
    plots=1
    if(plots==1){
      
      # Probability of sale and returns since peak (Panel B) 

      variable="percentiles_mean_point"
      
      data.plot <- 
        data_for_peaks[is.na(flag_no_peak)] %>%
        group_by( .dots=variable) %>%
        dplyr::summarise(avg_PctPasses = ci.binom(sell)[1], 
                         lci_PctPasses = ci.binom(sell)[2], 
                         uci_PctPasses = ci.binom(sell)[3]) 
      
      data.plot<- as.data.table(data.plot)
      
      lim.min=data.plot[percentiles_mean_point> -50 & percentiles_mean_point< 50][,min(lci_PctPasses)]
      lim.max=data.plot[percentiles_mean_point> -50 & percentiles_mean_point< 50][,max(uci_PctPasses)]

      variable= list("percentiles_mean", "gain.since.point")
      
      data.plot2 <- 
        data_for_peaks[is.na(flag_no_peak) ] %>%
        group_by( .dots=variable) %>%
        dplyr::summarise(avg_PctPasses = ci.binom(sell)[1], 
                         lci_PctPasses = ci.binom(sell)[2], 
                         uci_PctPasses = ci.binom(sell)[3]) 
      
      data.plot2<- as.data.table(data.plot2)[!is.na(gain.since.point)]
      
      lim.min2=data.plot2[percentiles_mean> -50 & percentiles_mean< 50][,min(lci_PctPasses)]
      lim.max2=data.plot2[percentiles_mean> -50 & percentiles_mean< 50][,max(uci_PctPasses)]
      lim.min.y=min(lim.min, lim.min2)*0.8
      lim.max.y=max(lim.max, lim.max2)*1.2

      variable= list("percentiles_mean")
      
      data.plot3 <- 
        data_for_peaks[is.na(flag_no_peak) ] %>%
        group_by( .dots=variable) %>%
        dplyr::summarise(avg_PctPasses = ci.binom(sell)[1], 
                         lci_PctPasses = ci.binom(sell)[2], 
                         uci_PctPasses = ci.binom(sell)[3]) 

      data.plot3<- as.data.table(data.plot3)
      
      lim.min3=data.plot3[percentiles_mean> -50 & percentiles_mean< 50][,min(lci_PctPasses)]
      lim.max3=data.plot3[percentiles_mean> -50 & percentiles_mean< 50][,max(uci_PctPasses)]
      
      library(ggthemes)
      
      xlab_use="Return Since Peak (%)  \n \n"
      plot_saved <-  ggplot(data.plot[percentiles_mean_point> -50 & percentiles_mean_point< 50][avg_PctPasses!=0]
                            , aes_string(x =  
                                           "percentiles_mean_point",
                                         y = "avg_PctPasses",
                                         ymin = "lci_PctPasses", ymax = "uci_PctPasses")) + 
        geom_point(colour = "black") + 
        geom_linerange(colour = "black") +
        theme_bw() + theme_classic() +
        xlab(xlab_use) +
        ylab("Probability of Selling Stock") +
        theme(text = element_text(
                                  size=19, family="serif"   ),
              legend.position = "bottom",
              legend.title.align = .5 ,
              legend.text=element_text(size=19),
              panel.border = element_blank()  ,
              panel.grid = element_blank() ,
              panel.grid.minor = element_blank()  
        ) +  scale_x_continuous(breaks=seq(-100,100,10) , limits=c(-50, 50)) 

      setwd(wd_figures)
      plot_saved 
      
      name_plot= paste0("patter_DE_since_", plot_name, "_", use_days ,"sample.pdf")
      ggsave(name_plot, width = 10, height = 5)
      

      # Interaction between returns since purchase and returns since peak price (Panel C).
      
      lab_use="Return Since Peak"
      data.plot2[, outCI:=0]
      
      exclude=data.plot2[gain.since.point==1, min(percentiles_mean)]
      
      data.plot2[percentiles_mean==exclude, outCI:=1]
      ggplot(data.plot2[percentiles_mean>-50 & percentiles_mean<50 & outCI==0] 
             ,
             aes(x = percentiles_mean, 
                 y = avg_PctPasses,
                 col=as.factor(gain.since.point) #,
             ))  + geom_linerange(
               aes(ymin = lci_PctPasses, ymax = uci_PctPasses  ,
                   col=as.factor(gain.since.point)  )) +  geom_point() +
        theme_bw() + theme_classic() + 
        xlab("Return Since Purchase (%)") +
        ylab("Probability of Selling Stock") +
        theme(text = element_text(
                                  size=19, family="serif"   ),
              legend.position = "bottom",
              legend.title.align = .5 ,
              legend.text=element_text(size=19),
              panel.border = element_blank()  ,
              panel.grid = element_blank() ,
              panel.grid.minor = element_blank()  
        )   +  scale_x_continuous(breaks=seq(-100,100,10) , limits=c(-50, 50)  ) +
        scale_color_manual(
          name = lab_use, breaks = c("0", "1"), labels = c("Loss", "Gain"), 
          values=c("lightslategray", "black" , "lightslategray", "gray59" 
          )   
        )+
        theme(legend.position = "bottom")                                       
      
      setwd(wd_figures)
      
      name_plot= paste0("patter_DE_interaction_", plot_name, "_", use_days ,"sample.pdf")
      ggsave(name_plot, width = 10, height = 5)
      
      
     # Probability of sale and returns since purchase (Panel A)      
      
      xlab_use="Return Since Purchase (%)"
      plot_saved <-  ggplot(data.plot3[percentiles_mean> -50 & percentiles_mean< 50][avg_PctPasses!=0]
                            , aes_string(x =  
                                           "percentiles_mean",
                                         y = "avg_PctPasses",
                                         ymin = "lci_PctPasses", ymax = "uci_PctPasses")) + 
        geom_point(colour = "black") + 
        geom_linerange(colour = "black") +
        theme_bw() + theme_classic() +
        xlab(xlab_use) +
        ylab("Probability of Selling Stock") +
        theme(text = element_text(
                                  size=19, family="serif"   ),
              legend.position = "bottom",
              legend.title.align = .5 ,
              legend.text=element_text(size=19),
              panel.border = element_blank()  ,
              panel.grid = element_blank() ,
              panel.grid.minor = element_blank()  
        ) +  scale_x_continuous(breaks=seq(-100,100,10) , limits=c(-50, 50)) 

      setwd(wd_figures)
      plot_saved 
      
      name_plot= paste0("patter_DE_purch_", plot_name, "_", use_days ,"sample.pdf")
      ggsave(name_plot, width = 10, height = 5)
      
    }
    
    plots_triple=1
    if(plots_triple==1){
      
      #Probability of Stock Sale, Returns Since Purchase, Returns Since Peak, and Returns Since Last Login Day      
      
      data_for_peaks[app>app_last_login, gain.since.login:=1]
      data_for_peaks[app<=app_last_login, gain.since.login:=0]
      
      variable= list("percentiles_mean", "gain.since.point", "gain.since.login")
      
      data.plot2 <- 
        data_for_peaks[is.na(flag_no_peak) ] %>%
        group_by( .dots=variable) %>%
        dplyr::summarise(avg_PctPasses = ci.binom(sell)[1], 
                         lci_PctPasses = ci.binom(sell)[2], 
                         uci_PctPasses = ci.binom(sell)[3]) 

      data.plot2<- as.data.table(data.plot2)[!is.na(gain.since.point)]
      
      lab_use="Return Since Peak"
      data.plot2[, outCI:=0]
      
      exclude=data.plot2[gain.since.point==1, min(percentiles_mean)]
      data.plot2[, gain.since.login:=factor(gain.since.login, levels=c(0,1), labels=c("Gain Since Last Login=0", "Gain Since Last Login=1"))]
      data.plot2[percentiles_mean==exclude, outCI:=1]
      ggplot(data.plot2[percentiles_mean>-50 & percentiles_mean<50 & outCI==0 & !is.na(gain.since.login)] 
             ,
             aes(x = percentiles_mean, 
                 y = avg_PctPasses,
                 col=as.factor(gain.since.point) #,
             ))  + geom_linerange(
               aes(ymin = lci_PctPasses, ymax = uci_PctPasses  ,
                   col=as.factor(gain.since.point)  )) +  geom_point() +
        theme_bw() + theme_classic() + 
        xlab("Return Since Purchase (%)") +
        ylab("Probability of Selling Stock") +
        theme(text = element_text(
                                  size=19, family="serif"   ),
              legend.position = "bottom",
              legend.title.align = .5 ,
              legend.text=element_text(size=19),
              panel.border = element_blank()  ,
              panel.grid = element_blank() ,
              panel.grid.minor = element_blank()  
        )   +  scale_x_continuous(breaks=seq(-100,100,10) , limits=c(-50, 50)  ) +
        scale_color_manual(
          name = lab_use, breaks = c("0", "1"), labels = c("Loss", "Gain"), 
          values=c("lightslategray", "black" , "lightslategray", "gray59" 
          ) #  
        )+ facet_grid(.~gain.since.login)+
        theme(legend.position = "bottom")                                       
      
      setwd(wd_figures)
      
      name_plot= paste0("patter_DE_triple_interaction_", plot_name, "_", use_days ,"sample.pdf")
      ggsave(name_plot, width = 10, height = 5)
      

    }  
    
    

    ##############################
    # Script "hist_plots.R" computes several histograms of returns.
    #
    # **Figure A13: Stock Returns Since Purchase and Returns Since Peak (Week Definition)**
    # - `figures/hist_retsincepur_MAXC_peak_5update_sellsample.pdf`
    # - `figures/hist_retsincepeak_MAXC_peak_5update_sellsample.pdf`
    #
    # **Figure A15: Stock Returns Since Purchase and Returns Since Peak (Month Definition)**
    # - `figures/hist_retsincepur_MAXC_peak_20update_sellsample.pdf`
    # - `figures/hist_retsincepeak_MAXC_peak_20update_sellsample.pdf`
    #
    # **Figure A17: Stock Returns Since Purchase and Returns Since Peak, Login-Day Sample**
    # - `figures/hist_retsincepur_MAXC_peak_5update_loginsample.pdf`
    # - `figures/hist_retsincepeak_MAXC_peak_5update_loginsample.pdf`
    ##############################
    
    histplots=1
    if(histplots==1 ){
      setwd(wd_code)    
      source("hist_plots.R")

    }
    
    
    
    
    
    
    stats_run=1
    if(stats_run==1){

      library(Hmisc) 
      library(stargazer)
      library(lfe)
      
      
      
      ##############################
      # Summary Statistics of Returns
      # -----------------------------
      # Summary statistics for stock returns since purchase and returns since peak price.
      
      # **Table A10: Summary Statistics for Stock Returns Since Purchase and Returns Since Peak**
      # - `tables/clean_summary_stats_returns_MAXC_peak_5update_sellsample.tex`
      
      # **Table A13: Summary Statistics for Stock Returns Since Purchase and Returns Since Peak (Month Definition)**
      # - `tables/clean_summary_stats_returns_MAXC_peak_20update_sellsample.tex`
      ##############################
      
      
      names_for_stat <- c(
        "return.since.pur100", 
        "gain.since.pur" ,
        #
        "return.since.point100",
        "gain.since.point"
      )
      
      library(stargazer)
      library(starpolishr)
      library(magrittr)
      table <- stargazer(data_for_peaks[ ,..names_for_stat], 
                         covariate.labels=c(#
                           "\\hspace{0.2cm} Return Since Purchase $(\\%)$" ,
                           "\\hspace{0.2cm} Gain Since Purchase=1",
                           paste0("\\hspace{0.2cm} Returns Since ", label_table, " $(\\%)$"),
                           paste0("\\hspace{0.2cm} Gain Since ", label_table, "=1")) ,
                         summary.stat=c(#"n",
                           "mean", "sd", 
                           "median" #
                         ),
                         omit.table.layout = 
                           "na",
                         float=F, 
                         table.layout ="-dc#-ats-n",
                         style = "aer",
                         column.sep.width = "-1pt" 
      )

      d=nrow(data_for_peaks)
      
      head=paste0("\\emph{Return Since ", label_table , " } &  &  &  \\\\")
      
      table1 <- star_insert_row(table,
                                c("\\emph{Return Since Purchase} &  &  &  \\\\",
                                  head,
                                  paste("N Investor $\\times$ Stock $\\times$ Day &" , d,   " &  &  \\\\", sep=" ", collapse=" ")),
                                insert.after = c(8, 10, 12 ))
      
      setwd(wd_tables)
      
      write.table(table1[9:(length(table1)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("clean_summary_stats_returns_", plot_name, "_", use_days ,"sample.tex"))
    }
    
    
    ##############################
    # Summary Statistics for Account-Level Variables
    # ----------------------------------------------
    # This section computes account-level variables used for the summary statistics table in the paper.

    # **Table A6: Stockbroking Accounts Sample Summary Statistics**
    # - `tables/stats_account_MAXC_peak_5update_loginsample.tex`
    
    # Additionally, a shorter version is produced for inclusion in the beamer presentation.
    ##############################

    
    
    
    if (data_name_read=="MAXC_peak_5update.csv" & use_days=="login"){

      setwd(wd_peak)
      for_stat_acc<- fread("data_peak_cross_sec_acc.csv")
      for_stat_acc<-for_stat_acc[anon %in% unique(data_for_peaks$anon)]
      
      
      for_stat_acc[,per_MF100:=per_MF*100]
      names_for_stat <-  c("female",   
                           "age", 
                           "account_tenure_years_to2016",
                           "port_val10000",
                           "MF_val10000",
                           "per_MF100",
                           "number_stock", 
                           "months_w_logins100",
                           "months_w_tr100")  
      
      library(xtable)
      for_table<-(sum_up(for_stat_acc[ ,..names_for_stat], d=T))
      for_table<-as.data.table(for_table)[,.(Obs, Variable, Mean, Min, p25, p50, p75, Max)]
      for_table[Variable=="female", order:=1]
      for_table[Variable=="age",order:=2] 
      for_table[Variable=="account_tenure_years_to2016", order:=3]
      for_table[Variable==  "port_val10000",order:=4]
      for_table[Variable==  "MF_val10000",order:=5]
      for_table[Variable==  "per_MF100",order:=6]
      for_table[Variable==  "number_stock", order:=7]
      for_table[Variable==  "months_w_logins100",order:=8]
      for_table[Variable==  "months_w_tr100", order:=9] 
      for_table<-for_table[order(order)]
      for_table[Variable=="female", ":="(Min=NA, p25=NA, p50=NA, p75=NA, Max=NA)]
      
      for_table_ready<-rbind(data.table(Variable="Card Holder Characteristics"),
                             for_table[1:3],
                             data.table(Variable="Account Holder Characteristics"),
                             for_table[4:9],
                             data.table(Variable="N", Mean=unique(for_table[,.(Obs)])$Obs ),
                             fill=T)
      
      
      row.names(for_table_ready) <-c( "\\emph{Account Holder Characteristics}",
                                      "\\hspace{0.3cm} Female", "\\hspace{0.3cm} Age (years)", "\\hspace{0.3cm} Account Tenure (years)",
                                      "\\emph{Account Characteristics}",
                                      "\\hspace{0.3cm} Portfolio Value (\\pounds10000)",
                                      "\\hspace{0.3cm} Investment in Mutual Funds (\\pounds10000)",
                                      "\\hspace{0.3cm} Investment in Mutual Funds (\\%)",
                                      "\\hspace{0.3cm} Number of Stocks",
                                      "\\hspace{0.3cm} Login days (\\% all days)",
                                      "\\hspace{0.3cm} Transaction days (\\% all market open days)",
                                      "N Accounts")
      for_table_ready[, ":=" (Variable=NULL, order=NULL, Obs=NULL)]                         
      
      mdat<-matrix(0,nrow=nrow(for_table_ready),ncol=(ncol(for_table_ready)+1))
      mdat[,]<-3
      mdat[12,]<-0
      options(xtable.sanitize.text.function=identity)
      options(xtable.sanitize.colnames.function = identity)
      
      for_table_ready_latex<- xtable(for_table_ready,  comment = FALSE, digits=mdat, align="lcccccc")

      table<- capture.output(for_table_ready_latex)
      table<-c(table[1:12], "\\\\",table[13:23])
      setwd(wd_tables)
      write.table(table[12:length(table)-3], col.names = F, 
                  row.names = F, quote = FALSE, paste0("stats_account_", plot_name, "_", use_days ,"sample.tex"))
      
      # a shorter version for the beamer

      for_table_ready[, ":=" (Min=NULL, p25=NULL, p75=NULL, Max=NULL)]                         
      
      mdat<-matrix(0,nrow=nrow(for_table_ready),ncol=(ncol(for_table_ready)+1))
      mdat[,]<-3
      mdat[12,]<-0
      
      options(xtable.sanitize.text.function=identity)
      options(xtable.sanitize.colnames.function = identity)
      
      for_table_ready_latex<- xtable(for_table_ready,  comment = FALSE, digits=mdat)

      table<- capture.output(for_table_ready_latex)
      table<-c(table[1:12], "\\\\",table[13:23])
      setwd(wd_tables)
      write.table(table[12:length(table)-3], col.names = F, 
                  row.names = F, quote = FALSE, paste0("stats_account_", plot_name, "_", use_days ,"sample_beamer.tex"))
      
    }
    
    
    #########################################################################################################
    # 
    # Computing Regression Tables for the Paper
    # -----------------------------------------
    # This section computes the regression tables used in the paper for both the sell and login samples.
    # Please select the appropriate sample below before running the remaining code.
    
    # Note: the order of tables and figures in the code may differ from the sequence in the paper. 
    # The reorganization of the paper based on reviewer suggestions has resulted in some tables and figures being relocated to the appendix.
    #
    # **Table 5: Purchase and Peak Price Disposition Effect for Stocks: OLS Estimates**
    # - `tables/clean_OLS_MAXC_peak_5update_sellsample.tex`
    #
    # **Table A31: Purchase and Peak Price Disposition Effects for Stocks: Individual Fixed Effects Estimates**
    # - `tables/clean_FE_MAXC_peak_5update_sellsample.tex`
    #
    # **Table A32: Purchase and Peak Price Disposition Effects for Stocks: Including Continuous Returns Since Purchase and Since Peak Price**
    # - `tables/clean_OLS_returns_MAXC_peak_5update_sellsample.tex`
    #
    #########################################################################################################
    
    regressions_run=1
    if(regressions_run==1){
      
      library(lfe)
      library(stargazer)
      
      print("OLS")
      
      summary(m1c<- felm(sell~gain.since.pur             | 0 | 0 | anon + port_date, 
                         data_for_peaks))
      
      summary(m2c<- felm(sell~gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks))
      
      summary(m3c<- felm(sell~gain.since.pur + gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks))
      
      if (period_name=="Week"){
        
        print("FE")
        
        summary(m1fec<- felm(sell~gain.since.pur            | anon | 0 | anon + port_date, 
                             data_for_peaks))
        
        summary(m2fec<- felm(sell~gain.since.point               | anon  | 0 | anon + port_date, 
                             data_for_peaks))
        
        summary(m3fec<- felm(sell~gain.since.pur + gain.since.point          | anon  | 0 | anon + port_date, 
                             data_for_peaks))
      }

      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
        summary(n1c<- felm(sell~ gain.since.pur + return.since.pur_pos100 + return.since.pur_neg100   | 0 | 0 | anon + port_date, 
                           data_for_peaks))
        
        summary(n1fec<- felm(sell~gain.since.pur + return.since.pur_pos100 + return.since.pur_neg100   | anon  | 0 | anon + port_date, 
                             data_for_peaks))
        
        summary(n2c<- felm(sell~gain.since.point + return.since.point_pos100 + return.since.point_neg100    | 0 | 0 | anon + port_date, 
                           data_for_peaks))
        
        summary(n2fec<- felm(sell~gain.since.point + return.since.point_pos100 + return.since.point_neg100   | anon  | 0 | anon + port_date, 
                             data_for_peaks))
        
        
        summary(n12c<- felm(sell~ gain.since.pur + return.since.pur_pos100 + return.since.pur_neg100 +
                              gain.since.point + return.since.point_pos100 + return.since.point_neg100                   | 0 | 0 | anon + port_date, 
                            data_for_peaks))
        summary(n12fec<- felm(sell~  gain.since.pur + return.since.pur_pos100 + return.since.pur_neg100  +
                                gain.since.point + return.since.point_pos100 + return.since.point_neg100                 | anon | 0 | anon + port_date, 
                              data_for_peaks))
      }
      

      label_table= "Peak"
      label_table_long= "Gain Since Peak"
      
      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
      # Table A32: Purchase and Peak Price Disposition Effects for Stocks: Including Continuous Returns Since Purchase and Since Peak Price  
        
        names_of_IV<-  unique(names(unlist(lapply(list(n1c, n2c, n12c,  n1fec, n2fec, n12fec), coef))))[-1]

        important_labels<- c(
          "Gain Since Purchase=1",
          "Return Since Purchase $>0$ $(\\%)$",
          "Return Since Purchase $<0$ $(\\%)$",
          paste0(label_table_long, "=1"),
          paste0("Returns Since ", label_table, " $>0$ $(\\%)$"),
          paste0("Returns Since ", label_table, " $<0$ $(\\%)$")
        )
        
        order_IV_omited<- c()
        table<-stargazer(n1c, n2c, n12c, n1fec, n2fec, n12fec,
                         title="", align=TRUE,type="latex",  
                         dep.var.labels=c(""),
                         covariate.labels=important_labels,
                         omit        = NULL  ,         
                         omit.stat=c("LL","ser","f", "adj.rsq") , 
                         omit.table.layout = "n",
                         float=F, 
                         table.layout ="-dc#-tas-",
                         style = "aer",
                         column.sep.width = "-1pt",
                         add.lines=list(c("Account FE", "NO", "NO", "NO",  "YES", "YES","YES")) , 
                         digits=4
        ) 
        

        library(stringr)
        table <- str_replace_all(table, "\\^", "")
        table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
        setwd(wd_tables)
        write.table(table[10:(length(table)-2)], col.names = F, 
                    row.names = F, quote = FALSE, paste0("clean_OLS_returns_", plot_name, "_", use_days ,"sample.tex"))

      }
      
      # Table 5: Purchase and Peak Price Disposition Effect for Stocks: OLS Estimates
      
      names_of_IV<-  unique(names(unlist(lapply(list(m1c, m2c, m3c), coef))))[-1]

      important_labels<- c( "Gain Since Purchase=1",
                            paste0(label_table_long, "=1")
      )
      
      
      table<-stargazer( m1c, m2c, m3c, 
                        title="", align=TRUE,type="latex",
                        dep.var.labels=c(""),
                        covariate.labels=important_labels,
                        omit        = NULL ,  
                        omit.stat=c("LL","ser","f", "adj.rsq") , 
                        omit.table.layout = "n",
                        float=F, 
                        table.layout ="-dc#-tas-",
                        style = "aer",
                        column.sep.width = "10pt",
                        digits=4 #
      )
      

      library(stringr)
      table <- str_replace_all(table, "\\^", "")
      table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")

      setwd(wd_tables)
      write.table(table[10:(length(table)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("clean_OLS_", plot_name, "_", use_days ,"sample.tex"))
      
      
      if (period_name=="Week"){
      
      # Table A31: Purchase and Peak Price Disposition Effects for Stocks: Individual Fixed Effects Estimates 
        
        table<-stargazer(  m1fec, m2fec, m3fec,
                           title="", align=TRUE,type="latex",
                           dep.var.labels=c(""),
                           covariate.labels=important_labels,
                           omit        = NULL ,  
                           omit.stat=c("LL","ser","f", "adj.rsq") , 
                           omit.table.layout = "n",
                           float=F, 
                           table.layout ="-dc#-tas-",
                           style = "aer",
                           column.sep.width = "10pt",
                           digits=4 #
        )
        
        
        library(stringr)
        table <- str_replace_all(table, "\\^", "")
        table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
        
        setwd(wd_tables)
        write.table(table[10:(length(table)-2)], col.names = F, 
                    row.names = F, quote = FALSE, paste0("clean_FE_", plot_name, "_", use_days ,"sample.tex"))
        
      }  
    }
    
    
    #########################################################################################################
    # 
    # Analyzing Mechanisms
    # ---------------------
    # In this section, we analyze different mechanisms driving the results.
    # Various R scripts are called to perform these analyses.
    #
    # Using the script "mechanism.R", we run the following analyses:
    # 
    # Interactions with market performance
    # -------------------
    # We compute the results for Table A38 and Table A39. 
    # These tables present estimates from subsamples defined by market gains/losses since purchase 
    # and since the peak price event. The analysis splits the sample based on whether the market 
    # (FTSE100) experienced a loss or gain since the peak price event.
    #
    # - Table A38 focuses on cases where the market (FTSE100) was in loss since the purchase.
    # - Table A39 focuses on cases where the market (FTSE100) was in gain since the purchase.
    #
    # Rebalancing Test:
    # -----------------
    # We also test for rebalancing by restricting the dependent variable to indicate complete sales only 
    # (i.e., liquidation of positions), thereby excluding partial sales that might reflect portfolio rebalancing. 
    # The results are shown in Table A35.
    #
    # Output Tables:
    # --------------
    # **Table A38: Estimates of the Stocks Disposition Effect (Sub-samples by FTSE100 Returns Since Purchase, Week-Peak, Sell-Day Sample)**
    # - `tables/clean_ftse1_MAXC_peak_5update_sellsample.tex`
    #
    # **Table A39: Estimates of the Stocks Disposition Effect (Sub-samples by FTSE100 Returns Since Purchase, Week-Peak, Sell-Day Sample)**
    # - `tables/clean_ftse2_MAXC_peak_5update_sellsample.tex`
    #
    # **Table A35: Estimates of the Stocks Disposition Effect (Excluding Partial Sells)**
    # - `tables/clean_rebalancing_MAXC_peak_5update_sellsample.tex`
    #
    #########################################################################################################
    

    
    mechanism_run=1
    if(mechanism_run==1 & period_name=="Week"){
      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
        setwd(wd_code)    
        source("mechanism.R")    
      }
    }
    

    
    

    #########################################################################################################
    # 
    # Top-Up Purchases and the Peak Price Effect
    # ------------------------------------------
    # The script "top_up.R" explores the behavior of investors making additional purchases (top-ups) of stocks.
    # If investors believe that stocks that have declined since their peak price will eventually recover, 
    # they may be more inclined to top-up those stocks, especially when the losses since the peak are significant.
    
    # Figure 5 presents binned scatterplots illustrating the relationships between losses since purchase, 
    # losses since peak, and the likelihood of investors topping up their current positions with new purchases of the same stock.
    
    # Regression Analysis:
    # --------------------
    # - **Table A40:** Presents regression estimates for the likelihood of topping up a stock as a function of the return since purchase.
    # - **Table A41:** Presents regression estimates for the likelihood of topping up a stock as a function of the return since peak price.
    
    # Output Files:
    # -------------
    # **Figure 5: Stock Top-Ups, Returns Since Purchase and Returns Since Peak**
    # - `figures/TOPUP_basic_DE_purch_MAXC_peak_5update_loginsample.pdf`
    # - `figures/TOPUP_DE_since_MAXC_peak_5update_loginsample.pdf`
    
    # **Table A40: Stock Top-Up Behavior When Stocks Are in Loss Since Purchase (OLS and Fixed Effects Estimates)**
    # - `tables/clean_TOPUP_returns_since_purchase_MAXC_peak_5update_loginsample.tex`
    
    # **Table A41: Top-Up Behavior When Stocks Are in Loss Since Past Peak Price (OLS and Fixed Effects Estimates)**
    # - `tables/clean_TOPUP_returns_since_peak_MAXC_peak_5update_loginsample.tex`
    #
    #########################################################################################################

    topup_run=1
    
    
    
    if(topup_run==1 & period_name=="Week" & use_days=="login"){
      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
        setwd(wd_code)    
        source("top_up.R")    
      }
    }
    
    #########################################################################################################
    #
    # Interaction Effects
    # ------------------------------------------------------
    # The following lines compute regression estimates to assess the disposition effect, testing the sensitivity 
    # of our main results to various market, account, and investor characteristics.
    # The estimates are generated through a loop that analyzes different subsamples.
    
    # **Table A36: Sub-Sample Analysis by Market Index Movements and Timing**
    # - This table presents estimates split by market index movements (e.g., FTSE100 up or down) and by the number 
    #   of days elapsed since purchase or since the peak price event. Each row reports coefficients for gains 
    #   since purchase and gains since peak, derived from separate regressions.
    
    # Output Files for Table A36:
    # - `tables/clean_ftse_up_sell.tex`: Subsample where the market index increased since purchase.
    # - `tables/clean_ftse_down_sell.tex`: Subsample where the market index decreased since purchase.
    # - `tables/clean_days_since_peak_sell1.tex`: Subsample based on days since peak (below median).
    # - `tables/clean_days_since_peak_sell2.tex`: Subsample based on days since peak (above median).
    # - `tables/clean_days_since_purchase_sell1.tex`: Subsample based on days since purchase (below median).
    # - `tables/clean_days_since_purchase_sell2.tex`: Subsample based on days since purchase (above median).
    
    # **Table A37: Sub-Sample Analysis by Individual Characteristics**
    # - This table provides estimates by subsamples defined by demographic and account characteristics, including gender, 
    #   age, account tenure, portfolio value, and the number of stocks held.
    
    # Output Files for Table A37:
    # - `tables/clean_femalesell.tex`: Female investors.
    # - `tables/clean_malesell.tex`: Male investors.
    # - `tables/clean_age_sell1.tex`: Younger investors (below median).
    # - `tables/clean_age_sell2.tex`: Older investors (above median).
    # - `tables/clean_tenure_sell1.tex`: Investors with shorter account tenure (below median).
    # - `tables/clean_tenure_sell2.tex`: Investors with longer account tenure (above median).
    # - `tables/clean_PV_sell1.tex`: Investors with lower portfolio value (below median).
    # - `tables/clean_PV_sell2.tex`: Investors with higher portfolio value (above median).
    # - `tables/clean_stocks_sell1.tex`: Investors holding fewer stocks (below median).
    # - `tables/clean_stocks_sell2.tex`: Investors holding more stocks (above median).
    #########################################################################################################
    
    
    
    
    
    
    data_for_peaks[, female:=ifelse(gender=="F", 1, 0)]
    
    regressions_split_run=1
    if(regressions_split_run==1){
      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
        cut<-as.data.table(sum_up(data_for_peaks[,.(distance_pur , distance_peak , portfolio_value_all_stocks, 
                                                    account_tenure_years_to2016, age, Count_sedols)], d=TRUE))
        
        names_files=c(
          paste0("clean_ftse_up_", use_days,".tex"),
          paste0("clean_ftse_down_", use_days,".tex"),
          paste0("clean_days_since_peak_", use_days,"1.tex"),
          paste0("clean_days_since_peak_", use_days,"2.tex"),
          paste0("clean_days_since_purchase_", use_days,"1.tex"),
          paste0("clean_days_since_purchase_", use_days,"2.tex"),
          paste0("clean_age_", use_days,"1.tex"),
          paste0("clean_age_", use_days,"2.tex"),
          paste0("clean_female", use_days,".tex"),
          paste0("clean_male", use_days,".tex"),
          paste0("clean_PV_", use_days,"1.tex"),
          paste0("clean_PV_", use_days,"2.tex"),
          paste0("clean_tenure_", use_days,"1.tex"),
          paste0("clean_tenure_", use_days,"2.tex"),
          paste0("clean_stocks_", use_days,"1.tex"),
          paste0("clean_stocks_", use_days,"2.tex")
        )
        
        for(i in 1:length(names_files)){
          
          name_file_latex=names_files[i]
          
          if (i==1){data.reg.used <- data_for_peaks[return_FTSE100_yest>0] 
          table_row="Return in $t-1>0$" }
          if (i==2){data.reg.used <- data_for_peaks[return_FTSE100_yest<0] 
          table_row="Return in $t-1<0$" }
          
          cut[Variable=="distance_peak", .(p50)]
          if (i==3){ data.reg.used <- data_for_peaks[distance_peak<=cut[Variable=="distance_peak"]$p50  ]
          table_row="Below Median" } 
          if (i==4){data.reg.used <- data_for_peaks[distance_peak>cut[Variable=="distance_peak"]$p50]
          table_row="Above Median" }
          
          if (i==5){data.reg.used <- data_for_peaks[distance_pur<= cut[Variable=="distance_pur"]$p50]
          table_row="Below Median" }  
          if (i==6){data.reg.used <- data_for_peaks[distance_pur >cut[Variable=="distance_pur"]$p50] 
          table_row="Above Median" } 
          cut[Variable=="distance_pur", .(p50)] 
          
          if (i==7){data.reg.used <- data_for_peaks[age<=cut[Variable=="age"]$p50  ] 
          table_row="Below Median" }
          
          if (i==8){data.reg.used <- data_for_peaks[age>cut[Variable=="age"]$p50 ] 
          table_row="Above Median" }
          
          if (i==9){  data.reg.used <- data_for_peaks[female==1 ] 
          table_row="Female" }
          if (i==10){data.reg.used <- data_for_peaks[female==0 ] 
          table_row="Male" }
          
          if (i==11){data.reg.used <- data_for_peaks[portfolio_value_all_stocks<=cut[Variable=="portfolio_value_all_stocks"]$p50  ] 
          table_row="Below Median" }
          if (i==12){data.reg.used <- data_for_peaks[portfolio_value_all_stocks>cut[Variable=="portfolio_value_all_stocks"]$p50 ] 
          table_row="Above Median" }
          
          
          if (i==13){data.reg.used <- data_for_peaks[account_tenure_years_to2016<=cut[Variable=="account_tenure_years_to2016"]$p50  ] 
          table_row="Below Median" }
          if (i==14){data.reg.used <- data_for_peaks[account_tenure_years_to2016>cut[Variable=="account_tenure_years_to2016"]$p50 ] 
          table_row="Above Median" }
          
          if (i==15){data.reg.used <- data_for_peaks[Count_sedols<=cut[Variable=="Count_sedols"]$p50  ] 
          table_row="Below Median" }
          
          if (i==16){data.reg.used <- data_for_peaks[Count_sedols>cut[Variable=="Count_sedols"]$p50 ] 
          table_row="Above Median" }
          

          library(lfe)

          summary(m3c<- felm(sell~gain.since.pur + gain.since.point                | 0 | 0 | anon + port_date, 
                             data.reg.used))
          
          table<-stargazer( m3c,
                            title="", align=TRUE,
                            type="latex",
                            omit.table.layout = "n",
                            float=F, 
                            table.layout ="-dc#-tas-",
                            style = "aer",
                            column.sep.width = "10pt",
                            digits=4 
          )
          
          
          
          library(stringr)
          table <- str_replace_all(table, "\\^", "")
          table <- str_replace_all(table, "\\\\ ", "&")
          table <- str_replace_all(table, "\\\\&", "")
          table=table[9:16]
          table <- str_replace_all(table, "gain.since.pur", "AAA")
          table <- str_replace_all(table, "gain.since.point &","")
          table <- str_replace_all(table, "AAA: ", "")
          table <- str_replace_all(table, "Constant &", "")
          table=paste(table, collapse = '')
          table=paste(table, "\\\\")
          
          table <- str_replace_all(table, "AAA", table_row)

          setwd(wd_tables)
          
          write.table(table, col.names = F, 
                      row.names = F, quote = FALSE,  name_file_latex) 
          
        }
        
      }
    } 
    
    
    #########################################################################################################
    # Cox Proportional Hazard Model with Time-Varying Covariates
    # ----------------------------------------------------------
    # In addition to linear probability models, we estimate a stratified Cox proportional hazard model 
    # with time-varying covariates. We follow Seru et al. (2010) by treating every purchase of a stock 
    # as the start of a new position. A position is considered to end on the date the investor first sells part or
    # all of their holdings.
    
    # Output:
    # -------
    # **Table A34: Cox Proportional Hazard Model Estimates of the Stocks Disposition Effect (Week-Peak)**
    # - `tables/cox_MAXC_peak_5update_sellsample.tex`
    #########################################################################################################
    
    regressions_cox_run=1
    if(regressions_cox_run==1){
      if(data_name_read=="MAXC_peak_5update.csv" ){ 
        
        setwd(wd_code)    
        source("cox_model.R")    
      }
    }

    
    #########################################################################################################
    #
    # Regressions with Additional Controls
    # ------------------------------------
    # This section presents regression estimates with added controls, as shown in Table A33. 
    # The models control for:
    # - Holding period (days since purchase)
    # - Days since peak price
    # - Portfolio characteristics (value, number of stocks)
    # - Account characteristics (tenure)
    # - Individual characteristics (gender, age)
    
    # The regressions also include account and stock fixed effects.
    #
    # The primary analysis defines a peak price event using a one-week horizon, where the peak is the highest price 
    # during the holding period that remains for at least one week.
    
    # We repeat the analysis in alternative samples:
    # Table A25 repeats the analysis using a month-long peak price definition.
    # Table A26 Repeats the analysis using login days instead of sell days.
    
    # Output Files:
    # -------------
    # **Table A33: Purchase and Peak Price Disposition Effects for Stocks (Including Portfolio and Demographic Controls)**
    # - `tables/full_control_MAXC_peak_5update_sell.tex`
    
    # **Table A25: Disposition Effect Estimates (Month-Peak Definition, Including Portfolio and Demographic Controls)**
    # - `tables/full_control_MAXC_peak_20update_sell.tex`
    
    # **Table A26: Disposition Effect Estimates (Login-Days, Including Portfolio and Demographic Controls)**
    # - `tables/full_control_MAXC_peak_5update_login.tex`
    #########################################################################################################
    
    regressions_long_run=1
    if(regressions_long_run==1){
      
      data.reg.used<- data_for_peaks
      a0c=NULL
      a1c=NULL
      a2c=NULL
      a3c=NULL
      a4c=NULL
      a5c=NULL
      a6c=NULL
      a7c=NULL
      a8c=NULL
      a9c=NULL
      data.reg.used[, female:=ifelse(gender=="F", 1, 0)]
      
      data.reg.used[,port_value10000:=portfolio_value_all_stocks/10000]
      extreme=data.table(sum_up(data.reg.used[,.(port_value10000)], d=T))
      data.reg.used[, outlierPF:=0]
      
      data.reg.used[port_value10000< extreme$p1 | port_value10000> extreme$p99, outlierPF:=1]
      data.reg.used[, distance_pur100:=distance_pur/100]
      data.reg.used[, distance_peak100:=distance_peak/100]
      data.reg.used[, age10:=age/10]
      data.reg.used[, Count_sedols10:=Count_sedols/10]
      
      summary(a0c<- felm(sell~gain.since.pur + gain.since.point               | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a1c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100             | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a2c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100             | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a3c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000            | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a4c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10             | 0| 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a5c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10  +  account_tenure_years_to2016                 | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a6c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10 +  account_tenure_years_to2016 +  female              | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a7c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10  +  account_tenure_years_to2016 +  female +  age10               | 0 | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a8c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10             | anon | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      summary(a9c<- felm(sell~gain.since.pur + gain.since.point  +   distance_pur100 +  distance_peak100 + port_value10000 +  Count_sedols10             | anon + Code_used_DS | 0 | anon + port_date, data.reg.used[outlierPF==0]))
      
      
      important_labels_long<- c("Gain Since Purchase=1",
                                "Gain Since Peak=1",
                                "Days Since Purchase (100 days)",
                                "Days Since Peak (100 days)",
                                "Portfolio Value (\\pounds10000)",
                                "Number of Stocks (10 stocks)",
                                "Account Tenure (years)",
                                "Female=1",
                                "Age (10 years)")
      
      table<-stargazer( 
        a0c, a1c,  a2c, a3c, a4c, a5c, a6c, a7c, a8c, a9c,
        title="", align=TRUE,type="latex",
        dep.var.labels=c(""),
        covariate.labels=c(important_labels_long),
        omit.stat=c("LL","ser","f", "adj.rsq") , 
        omit.table.layout = "n",
        float=F, 
        table.layout ="-dc#-tas-",
        style = "aer",
        no.space=TRUE,
        column.sep.width = "10pt",
        add.lines=list(c("Account FE", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "YES", "YES"),
                       c("Stock FE",   "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO", "YES")), 
        digits=4 
      )
      
      library(stringr)
      table <- str_replace_all(table, "\\^", "")
      table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
      
      
      setwd(wd_tables)
      write.table(table[10:(length(table)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("full_control_",plot_name,"_", use_days,".tex")) 

      table<-stargazer( 
        a5c, a6c, a7c, a8c, a9c,
        title="", align=TRUE,type="latex",
        dep.var.labels=c(""),
        covariate.labels=c(important_labels_long),
        omit.stat=c("LL","ser","f", "adj.rsq") , 
        omit.table.layout = "n",
        float=F, 
        table.layout ="-dc#-tas-",
        style = "aer",
        no.space=TRUE,
        column.sep.width = "10pt",
        add.lines=list(c("Account FE", "NO", "NO", "NO", "YES", "YES"),
                       c("Stock FE",   "NO", "NO", "NO", "NO", "YES")), 
        digits=4 #
      )
      
      library(stringr)
      table <- str_replace_all(table, "\\^", "")
      table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")

      setwd(wd_tables)
      write.table(table[10:(length(table)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("full_control_",plot_name,"_", use_days,"_beamer.tex"))
      
    }
  }
}  


#################################################################################################################
# Holding Analysis: Ownership and the Peak Price Effect
# ------------------------------------------------------
# This section examines how stock ownership influences the peak price effect. 
# For each investor-stock-day observation, we identify the peak price within the past year (the highest price 
# over the last 12 months). The sample is divided into:
# - Observations where the investor held the stock on the peak day
# - Observations where the investor did not hold the stock on the peak day

# Table 10: Ownership and the Peak Price Effect
# - Panel A: Results for investors who did not hold the stock on the peak price day.
# - Panel B: Results for investors who held the stock on the peak price day.

# The following lines in the code run this analysis on both the sell and login day samples, using either 
# the weekly or monthly peak definition. The paper presents results for the weekly definition and the sell sample for concreteness.

# Output Files:
# -------------
# **Table 10: Estimates of the Stocks Disposition Effect, Placebo Analysis**
# - `tables/clean_OLS_noholding_past_year_peak_5update_sellsample.tex`
# - `tables/clean_OLS_holding_past_year_peak_5update_sellsample.tex`
#################################################################################################################


list_data_name_read=c("peak_past_year_5update.csv",
                      "peak_past_year_20update.csv"
)

list_days=c("login", "sell")

for (data_number in 1:2){
  for (days_number in 1:2){
    
    data_name_read=list_data_name_read[data_number]
    use_days=list_days[days_number]
    print(data_name_read)
    print(use_days)
    
    setwd(wd_peak)
    DT_peak_or_min<- fread(data_name_read)
    DT_peak_or_min[,port_date:=as.Date(port_date, "%Y-%m-%d")]
    
    if (data_name_read=="peak_past_year_5update.csv"){
      setnames(DT_peak_or_min, old = c(  "app_past_peak_up5_year", "date_past_peak_up5_year", "pass_peak_up5_year"),
               new = c("app_past_point","date_past_point", "pass_point_dummy"))
      
      plot_name="past_year_peak_5update"
      period_name="Week"
    }
    
    if (data_name_read=="peak_past_year_20update.csv"){
      setnames(DT_peak_or_min, old = c(  "app_past_peak_up20_year", "date_past_peak_up20_year", "pass_peak_up20_year"),
               new = c("app_past_point","date_past_point", "pass_point_dummy"))
      
      plot_name="past_year_peak_20update"
      period_name="Month"
      
    }
    
    data_for_peaks<- merge(data.and.login.date.m, DT_peak_or_min[,.(Code_used_DS,  port_date ,  anon,app_past_point ,date_past_point ,pass_point_dummy)],
                           by=c("anon", "Code_used_DS",  "port_date"), all.x=T)

    DT_peak_or_min=NULL

    total_number_transactions<- (unique(data_for_peaks[sell==1 | buy==1,.(port_date, anon)]))[,.N, by=.(anon)][,total_number_transactions:=N][,.(anon,total_number_transactions )]
    total_number_logins<- (unique(data_for_peaks[,.(port_date, anon)]))[,.N, by=.(anon)][,total_login:=N][,.(anon, total_login)]
    ratio<- merge(total_number_transactions,total_number_logins, by=c("anon"), all.x=T )[, ratio_tr_login:=total_number_transactions/ total_login]
    data_for_peaks<-merge(data_for_peaks, ratio, by=c("anon"), all.x=T)
    
    data_for_peaks[,date_past_point:=as.Date(date_past_point, "%Y-%m-%d")]
    data_for_peaks[,pur_day:=as.Date(pur_day, "%Y-%m-%d")]
    
    data_for_peaks[!is.na(app_past_point),return.since.point:=(app- app_past_point )/app_past_point ]
    data_for_peaks[!is.na(app_past_point),return.since.point100:= return.since.point*100]
    data_for_peaks[!is.na(app_past_point),gain.since.point:=ifelse(return.since.point>0, 1, 0)]
    data_for_peaks[!is.na(app_past_point),loss.since.point:=ifelse(return.since.point<0, 1, 0)] 
    
    data_for_peaks[!is.na(return.since.point),return.since.point_pos100:=ifelse(return.since.point>0,return.since.point*100, 0)]
    data_for_peaks[!is.na(return.since.point),return.since.point_neg100:=ifelse(return.since.point<=0, return.since.point*100, 0)]

    data_for_peaks[pur_day>date_past_point, holding:=0]
    data_for_peaks[pur_day<=date_past_point, holding:=1]

    data_for_peaks[ gain.since.pur==0 & gain.since.point==1 & holding==0, flag_no_peak:=1]

    data_for_peaks[, flag_holding:=0]
    
    data_for_peaks[ gain.since.pur==0 & gain.since.point==1 & holding==1, flag_holding:=1]

    data_for_peaks[is.na(flag_no_peak), mean(flag_holding)] # less than 1%

    nbins=100

    data_for_peaks[is.na(flag_no_peak),percentiles_point:=xtile(return.since.point,n=nbins)]
    data_for_peaks[is.na(flag_no_peak),percentiles_mean_point:=mean(return.since.point100), by=.(percentiles_point)]
    
    data_for_peaks[is.na(flag_no_peak),percentiles:=xtile(return.since.pur,n=nbins)]
    data_for_peaks[is.na(flag_no_peak),percentiles_mean:=mean(return.since.pur100), by=.(percentiles)]
    
    grep("return", names(data_for_peaks), value=T)

    data_for_peaks[!is.na(return.since.pur),return.since.pur_pos100:=ifelse(return.since.pur>0,return.since.pur*100, 0)]
    data_for_peaks[!is.na(return.since.pur),return.since.pur_neg100:=ifelse(return.since.pur<=0, return.since.pur*100, 0)] 

    data_for_peaks<- data_for_peaks[is.na(flag_no_peak)]
    
    return_sum<- sum_up(data_for_peaks[,grep("return", names(data_for_peaks), value=T),  with=FALSE]
                             ,    d = TRUE)
    
    cut.out.return<- as.data.table(return_sum)[Variable== "return.since.pur100", .(p1, p99)]
    cut.out.return.point<- as.data.table(return_sum)[Variable== "return.since.point100", .(p1, p99)]
    
    data_for_peaks<- data_for_peaks[is.na(flag_no_peak)]
    
    data_for_peaks[,outlier:=0]
    data_for_peaks[return.since.pur100 > cut.out.return$p99, outlier:=1]
    data_for_peaks[return.since.pur100 < cut.out.return$p1, outlier:=1]
    
    data_for_peaks[return.since.point100 > cut.out.return.point$p99, outlier:=1]
    data_for_peaks[return.since.point100 < cut.out.return.point$p1, outlier:=1]
    
    data_for_peaks[,mean(outlier)]
    
    data_for_peaks<- data_for_peaks[outlier==0]
    
    data_for_peaks<- data_for_peaks[!is.na(return.since.point) & !is.na(return.since.pur)]
    
    label_table= "Peak"
    
    anon_in_sample_after_outliers<-data_for_peaks[num_sales_the_day>0][,.N, by=.(anon)]
    
    data_for_peaks<- data_for_peaks[anon %in% anon_in_sample_after_outliers$anon]
    
    if(use_days=="sell"){
      data_for_peaks<- data_for_peaks[num_sales_the_day>0]
      
    }
    
    # REGRESSIONS

    data_for_peaks[, distance_pur:= as.numeric(port_date- pur_day)]
    data_for_peaks[, distance_pur100:= distance_pur/100]

    regressions_run=1
    if(regressions_run==1){
      
      library(lfe)
      library(stargazer)
      
      summary(m1c<- felm(sell~gain.since.pur             | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==0 ]))
      
      summary(m2c<- felm(sell~gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==0 ]))
      
      summary(m3c<- felm(sell~gain.since.pur + gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==0  ]))
      
      summary(m4c<- felm(sell~gain.since.pur + gain.since.point    + distance_pur100             | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==0  ]))

      if (period_name=="Week"){
        
        summary(m1fec<- felm(sell~gain.since.pur            | anon | 0 | anon + port_date, 
                             data_for_peaks[holding==0 ]))
        
        summary(m2fec<- felm(sell~gain.since.point               | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==0 ]))
        
        summary(m3fec<- felm(sell~gain.since.pur + gain.since.point          | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==0 ]))
        
        
        summary(m4fec<- felm(sell~gain.since.pur + gain.since.point + distance_pur100         | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==0 ]))
        
        
        summary(m5fec<- felm(sell~gain.since.pur + gain.since.point + distance_pur100         | anon + Code_used_DS   | 0 | anon + port_date, 
                             data_for_peaks[holding==0 ]))
        
      } 

      summary(h1c<- felm(sell~gain.since.pur             | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==1 ]))
      
      summary(h2c<- felm(sell~gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==1 ]))
      
      summary(h3c<- felm(sell~gain.since.pur + gain.since.point                | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==1 ]))
      
      summary(h4c<- felm(sell~gain.since.pur + gain.since.point + distance_pur100               | 0 | 0 | anon + port_date, 
                         data_for_peaks[holding==1 ]))

      if (period_name=="Week"){
        
        summary(h1fec<- felm(sell~gain.since.pur            | anon | 0 | anon + port_date, 
                             data_for_peaks[holding==1 ]))
        
        summary(h2fec<- felm(sell~gain.since.point               | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==1 ]))
        
        summary(h3fec<- felm(sell~gain.since.pur + gain.since.point          | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==1 ]))
        
        summary(h4fec<- felm(sell~gain.since.pur + gain.since.point + distance_pur100          | anon  | 0 | anon + port_date, 
                             data_for_peaks[holding==1 ]))
        
        summary(h5fec<- felm(sell~gain.since.pur + gain.since.point + distance_pur100          | anon + Code_used_DS   | 0 | anon + port_date, 
                             data_for_peaks[holding==1 ]))
        
      }

      label_table= "Peak"
      label_table_long= "Gain Since Peak"

      # No holding

      important_labels<- c( "Gain Since Purchase=1",
                            paste0(label_table_long, "=1"),
                            "Days from Purchase Day (100 days)"
      )
      
      
      
      table<-stargazer(  m3c, m4c, m5fec,
                         title="", align=TRUE,type="latex",
                         dep.var.labels=c(""),
                         covariate.labels=important_labels,
                         omit        = NULL ,  
                         omit.stat=c("LL","ser","f", "adj.rsq") , 
                         omit.table.layout = "n",
                         float=F, 
                         table.layout ="-dc#-tas-",
                         style = "aer",
                         column.sep.width = "10pt",
                         add.lines=list(c("Account FE",  "NO","NO", "YES"),
                                        c("Stock FE",   "NO","NO", "YES")), 
                         digits=4 
      )
      

      library(stringr)
      table <- str_replace_all(table, "\\^", "")
      table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
      
      setwd(wd_tables)
      write.table(table[10:(length(table)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("clean_OLS_noholding_", plot_name, "_", use_days ,"sample.tex"))
      
      # Holding

      important_labels<- c( "Gain Since Purchase=1",
                            paste0(label_table_long, "=1"),
                            "Days from Purchase Day (100 days)"
      )
      
      table<-stargazer( h3c, h4c,h5fec,
                        title="", align=TRUE,type="latex",
                        dep.var.labels=c(""),
                        covariate.labels=important_labels,
                        omit        = NULL ,  
                        omit.stat=c("LL","ser","f", "adj.rsq") , 
                        omit.table.layout = "n",
                        float=F, 
                        table.layout ="-dc#-tas-",
                        style = "aer",
                        column.sep.width = "10pt",
                        add.lines=list(c("Account FE", "NO","NO", "YES"),
                                       c("Stock FE",   "NO","NO", "YES")), 
                        digits=4 
      )
      
      library(stringr)
      table <- str_replace_all(table, "\\^", "")
      table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
      
      setwd(wd_tables)
      write.table(table[10:(length(table)-2)], col.names = F, 
                  row.names = F, quote = FALSE, paste0("clean_OLS_holding_", plot_name, "_", use_days ,"sample.tex"))
      
      # Beamer tables
      if (period_name=="Week"){
        table<-stargazer(  m1c, h1c,
                           title="", align=TRUE,type="latex",
                           dep.var.labels=c(""),
                           covariate.labels=important_labels[1],
                           omit        = NULL ,  
                           omit.stat=c("LL","ser","f", "adj.rsq") , 
                           omit.table.layout = "n",
                           float=F, 
                           table.layout ="-dc#-tas-",
                           style = "aer",
                           column.sep.width = "10pt",
                           digits=4 
        )
        
        library(stringr)
        table <- str_replace_all(table, "\\^", "")
        table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
        
        setwd(wd_tables)
        write.table(table[10:(length(table)-2)], col.names = F, 
                    row.names = F, quote = FALSE, paste0("clean_OLS_holding1_", plot_name, "_", use_days ,"sample_beamer.tex"))
        
        table<-stargazer(  m2c, h2c, 
                           title="", align=TRUE,type="latex",
                           dep.var.labels=c(""),
                           covariate.labels=important_labels[2],
                           omit        = NULL ,  
                           omit.stat=c("LL","ser","f", "adj.rsq") , 
                           omit.table.layout = "n",
                           float=F, 
                           table.layout ="-dc#-tas-",
                           style = "aer",
                           column.sep.width = "10pt",
                           digits=4 
        )
        
        library(stringr)
        table <- str_replace_all(table, "\\^", "")
        table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
        
        setwd(wd_tables)
        write.table(table[10:(length(table)-2)], col.names = F, 
                    row.names = F, quote = FALSE, paste0("clean_OLS_holding2_", plot_name, "_", use_days ,"sample_beamer.tex"))
        table<-stargazer(  m3c, m4c, h3c, h4c,
                           title="", align=TRUE,type="latex",
                           dep.var.labels=c(""),
                           covariate.labels=important_labels,
                           omit        = NULL ,  
                           omit.stat=c("LL","ser","f", "adj.rsq") , 
                           omit.table.layout = "n",
                           float=F, 
                           table.layout ="-dc#-tas-",
                           style = "aer",
                           column.sep.width = "10pt",
                           digits=4 
        )
        
        library(stringr)
        table <- str_replace_all(table, "\\^", "")
        table <- str_replace_all(table, "R\\$\\{2\\}\\$", "R$^{2}$")
        
        setwd(wd_tables)
        write.table(table[10:(length(table)-2)], col.names = F, 
                    row.names = F, quote = FALSE, paste0("clean_OLS_holding3_", plot_name, "_", use_days ,"sample_beamer.tex"))

      }
    }
    
    
    
    
    
    #################################################################################################################
    #
    # Extension of the Analysis:
    # --------------------------
    # 
    # Earlier, we analyzed the relationship between losses since peak and the likelihood of investors 
    # topping up their current positions. Panel B of Figure 5 in the paper shows that the probability of 
    # top-up increases as losses since peak price grow. This relationship is confirmed by regression 
    # analysis, which shows a negative coefficient for the loss since peak variable.
    # 
    # In the holding analysis, Table A42 extends this analysis by testing whether the negative slope 
    # observed in Panel B of Figure 5 differs for cases where the investor did not hold the stock during 
    # the peak price. However, the results in Table A42 show that the slopes are quantitatively similar 
    # for peaks occurring before the purchase of the stocks and peaks occurring after the purchase.
    # 
    # Output File:
    # ------------
    # **Table A42: Top-Up Behavior When Stocks Are in Loss Since Past Peak Price (Placebo Test, Peaks Defined for the Past Year)**
    # - `tables/clean_TOPUP2_returns_since_peak_past_year_peak_5update_loginsample.tex`
    #
    #################################################################################################################
    
    regressions_run=1
    if(regressions_run==1){
      setwd(wd_code)

      source("top_up_placebo.R")  
    }
    
  }
}



##########################################################################################
# EXAMPLES: Illustrating the Concept of Peak Prices
# -------------------------------------------------
# We illustrate the concept of a peak price using examples for stocks.

# Figure 1: Peak Price Definition Using a 1-Week Horizon
# ------------------------------------------------------
# Panel B of Figure 1 shows a daily price series for a commonly purchased stock, with data ranging from 
# April 2013 to September 2014. The stock price is observed drifting upward. A peak price is defined using 
# a 1-week horizon, where each blue dot represents the highest price since the start of the period that 
# remains the highest for at least one week. For sensitivity analysis, the time horizon is extended to 
# 1 month, as shown in Figure A14.

# To generate these examples, we use the `finding_peak_MAX` function from the `cleaning_data3.R` script. 
# This function identifies the peak price since the investor purchased the stock. The `update_days` parameter 
# specifies the number of days a peak must remain the highest value for it to be considered a valid peak.

# `finding_peak_MAX` function  requires the follwoing data: `buys_happening2` (a data table containing purchase dates, 
# `dd`(a data table containing price data), and `list_anon` (a data table including all investor identifiers in the dataset).

# Output Files:
# -------------
# **Figure 1: Examples of Peak Prices (1-Week Definition)**
# - `figures/example_restriction_peaks_week.pdf`

# **Figure A14: Examples of Peak Prices (1-Month Definition)**
# - `figures/example_restriction_peaks_month.pdf`

# Figure 4: Holding vs. Not Holding on Peak Price Day
# ---------------------------------------------------
# Figure 4 provides examples of scenarios where investors held (Panel A) and did not hold (Panel B) 
# a stock on the peak price day:
# - Panel A: The investor purchased the stock before the peak day event, experiencing both a gain since 
#   purchase and a gain since the peak price day.
# - Panel B: The investor purchased the stock after the peak day event (which occurred approximately 
#   six months prior to the purchase), and also experienced gains since purchase and since the peak price day.

# Output Files:
# -------------
# **Figure 4: Stock Price Trajectories for the Placebo Analysis**
# - `figures/hold_gainpeak_gainpurch.pdf`
# - `figures/nohold_gainpeak_gainpurch.pdf`
##########################################################################################


dd <- fread("D:/files_moved_19/output datastream/datastream data/310518_prices/datastream_20180601.csv")    

dd[,date:=as.Date(date, "%Y-%m-%d")]

dd <- dd[order(Code_used_DS, date)]
dd[, app_yest:=shift(app,1,NA,"lag") ,by=.(Code_used_DS)]
dd[, upp_yest:=shift(upp,1,NA,"lag") ,by=.(Code_used_DS)]

dd <- dd[,.(app,  app_yest, upp, upp_yest, af,date, Code_used_DS)] #note I do not have weekends


buys_happenidng<-data.and.login.date.m[,.(buy,anon, Code_used_DS, port_date, pur_day, qwapp,
                                          qwapp_bef, last_td, app, 
                                          quantity, narrative)]
buys_happenidng[qwapp!=qwapp_bef, buy_happened:=1]
buys_happenidng[qwapp!=qwapp_bef, buy_happened_day:=last_td]
buys_happenidng[qwapp!=qwapp_bef,new_purprice:=qwapp]

buys_happenidng[is.na(qwapp_bef) & !is.na(qwapp) & last_td==pur_day, buy_happened:=1]
buys_happenidng[is.na(qwapp_bef) & !is.na(qwapp) & last_td==pur_day, buy_happened_day:=last_td]
buys_happenidng[is.na(qwapp_bef) & !is.na(qwapp) & last_td==pur_day,new_purprice:=qwapp]

buys_happenidng<- buys_happenidng[!is.na(buy_happened_day)]

adding_buys<-unique(buys_happenidng[narrative=="BUY",.(anon, Code_used_DS, last_td, narrative, qwapp)])
setnames(adding_buys, "qwapp", "new_purprice2")
setnames(adding_buys, "last_td", "buy_happened_day")
adding_buys[,adding_buys:=1]
adding_buys[,narrative:=NULL]
adding_buys[,.(anon, Code_used_DS, new_purprice2, buy_happened_day)]

buys_happenidng2<-merge(buys_happenidng, adding_buys, by=c("anon", "Code_used_DS", "buy_happened_day" ), all=T)
adding_buys=NULL
buys_happenidng=NULL

buys_happenidng2[adding_buys==1 & is.na(buy_happened), buy_happened:=1]
buys_happenidng2<- buys_happenidng2[ buy_happened==1]
buys_happenidng2[is.na(new_purprice), new_purprice:=new_purprice2]
buys_happenidng2<-  unique(buys_happenidng2[,.(anon,Code_used_DS, buy_happened_day, new_purprice)])

buys_happenidng2[,order:=seq_along(new_purprice), by=.(anon, Code_used_DS, buy_happened_day)]
buys_happenidng2<-buys_happenidng2[order==1]

list_anon<-unique(data.and.login.date.m[,.(anon)])


name_stock="GB00B01BP176" 
list_anon_trial<-copy(list_anon)
list_anon_trial[,order:=seq_along(anon)]

# Figure 1: Peak Price Definition Using a 1-Week Horizon

list_peak_anon_trial<- finding_peak_MAX(number_anon = 8425 , show_merged = "NO"  , update_days = 5 )
unique(list_peak_anon_trial[,.(Code_used_DS, date_past_peak_rest)])


library(cowplot)
library(scales)

segments<-unique(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" &  port_date<"2014-09-01" & date_past_peak_rest> "2013-03-20" ,.(app_past_peak_rest, date_past_peak_rest)])
segments[, top:=date_past_peak_rest+7]

ggplot(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" & port_date<"2014-09-01" ], aes(port_date, appDS)) +  
  geom_line(colour="dodgerblue3") +
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" &  port_date<"2014-09-01" & date_past_peak_rest> "2013-03-20" ,.(app_past_peak_rest, date_past_peak_rest)])
             ,mapping=aes(date_past_peak_rest, app_past_peak_rest, colour="any"), size=4)+ #
  scale_y_continuous("Prices (£)")+
  theme_bw() + 
  theme_classic() + 
  theme(text = element_text(, 
                            size=19, family="serif"   ),
        legend.position = "none", #
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() ) +
  xlab("") + scale_color_manual(name = "", 
                                values = c("dodgerblue3")#,
  ) +
  scale_x_date(labels = date_format("%Y-%m"), date_breaks = "2 month") #+

setwd(wd_figures)
ggsave("example_restriction_peaks_week.pdf", width = 10, height = 5)

# Figure A14: Examples of Peak Prices (1-Month Definition)

list_peak_anon_trial<- finding_peak_MAX(number_anon = 8425 , show_merged = "NO"  , update_days = 20 )

segments<-unique(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" &  port_date<"2014-09-01" & date_past_peak_rest> "2013-03-20" ,.(app_past_peak_rest, date_past_peak_rest)])
segments[, top:=date_past_peak_rest+28]

ggplot(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" & port_date<"2014-09-01" ], aes(port_date, appDS)) +  
  geom_line(colour="dodgerblue3") +
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==name_stock & port_date>"2013-03-20" &  port_date<"2014-09-01" & date_past_peak_rest> "2013-03-20" ,.(app_past_peak_rest, date_past_peak_rest)])
             ,mapping=aes(date_past_peak_rest, app_past_peak_rest, colour="any"), size=4)+
  
  scale_y_continuous("Prices (£)")+
  theme_bw() + 
  theme_classic() + 
  theme(text = element_text(, 
                            size=19, family="serif"   ),
        legend.position = "bottom",
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() ) +
  xlab("") + scale_color_manual(name = "", 
                                values = c("dodgerblue3"),
                                labels = c("Month-Peak Prices")) +
  theme(legend.position = "bottom")  +
  scale_x_date(labels = date_format("%Y-%m"), date_breaks = "2 month")#

setwd(wd_figures)
ggsave("example_restriction_peaks_month.pdf", width = 10, height = 5)


# Figure 4: Holding vs. Not Holding on Peak Price Day

data_for_peaks[holding==1 & gain.since.pur==0 & gain.since.point==1, hold_gainpeak_losspurch:=1]
data_for_peaks[holding==1 & gain.since.pur==1 & gain.since.point==1,hold_gainpeak_gainpurch:=1]
data_for_peaks[holding==0 & gain.since.pur==1 & is.na(flag_no_peak) & gain.since.point==1, nohold_gainpeak_gainpurch:=1]


potential_date_plot="2015-06-25"
min_date_plot="2014-01-01"

# Figure 4 (A) Holding Stock on Peak Day - Gain Since Purchase - Gain Since Peak

rows=data_for_peaks[holding==1 & gain.since.pur==1 & gain.since.point==1 & port_date==potential_date_plot,.(anon, Code_used_DS,
                                                                                                            app, app_past_point, qwapp, port_date,
                                                                                                            date_past_point, pur_day)] 
rows[, order:=seq_along(app)]

anon_plot=rows[87]$anon 
code_plot= rows[87]$Code_used_DS 
pur_day_plot= rows[87]$pur_day 
date_peak_plot= rows[87]$date_past_point 
today_plot=rows[87]$port_date 


n.anon=list_anon_trial[anon==anon_plot]$order

list_peak_anon_trial<- finding_peak(number_anon = n.anon , show_merged = "NO" , 
                                    update_days = 5, window_days = 262  )
unique(list_peak_anon_trial[Code_used_DS==code_plot, .(date_past_peak_rest)])

ggplot(list_peak_anon_trial[Code_used_DS==code_plot #
                            & port_date<= (today_plot+60) & port_date>= min_date_plot#
], aes(port_date, appDS)) +  
  geom_line(colour="dodgerblue3") +
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                date_past_peak==date_peak_plot,.(app_past_peak, date_past_peak)])
             ,aes(date_past_peak, app_past_peak)    , size=2,colour="dodgerblue3")  + # 
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                port_date==(pur_day_plot),.(appDS, port_date)])
             ,aes(port_date, appDS)    , size=2,colour="dodgerblue3")  + # 
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                port_date==(today_plot),.(appDS, port_date)])
             ,aes(port_date, appDS)    , size=2,colour="dodgerblue3") +
  geom_vline(xintercept = pur_day_plot, linetype="dashed", 
             color = "red", size=0.5) +
  geom_vline(xintercept = date_peak_plot, linetype="dashed", 
             color = "red", size=0.5) +
  geom_vline(xintercept = today_plot, linetype="dashed", 
             color = "red", size=0.5) +  coord_cartesian(clip = "off") +
  draw_label("\nPurchase day", x = pur_day_plot, y =2.15, size=15, fontfamily="serif") +
  draw_label("\nPeak day", x = date_peak_plot, y =2.15, size=15, fontfamily="serif") +
  draw_label("\nDay t", x = today_plot, y =2.15, size=15, fontfamily="serif") +
  scale_y_continuous("Prices (£)")+
  theme_bw() + 
  theme_classic() + 
  theme(text = element_text(, 
                            size=19, family="serif"   ),
        legend.position = "bottom",
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() )  +
  xlab("") +
  scale_x_date(labels = date_format("%Y-%m"), date_breaks = "3 month")

setwd(wd_figures)
ggsave("hold_gainpeak_gainpurch.pdf", width = 10, height = 5)


# Figure 4 (B) Not Holding Stock on Peak Day - Gain Since Purchase - Gain Since Peak

rows=data_for_peaks[holding==0 & gain.since.pur==1 & is.na(flag_no_peak) &
                      gain.since.point==1 & port_date==potential_date_plot,.(anon, Code_used_DS,
                                                                             app, app_past_point, qwapp, 
                                                                             qwapp_bef, port_date,
                                                                             date_past_point, pur_day, last_td)]

rows[, order:=seq_along(app)]

anon_plot=rows[74]$anon 
code_plot= rows[74]$Code_used_DS 
pur_day_plot= rows[74]$pur_day 
date_peak_plot= rows[74]$date_past_point 
today_plot=rows[74]$port_date 

n.anon=list_anon_trial[anon==anon_plot]$order

list_peak_anon_trial<- finding_peak(number_anon = n.anon , show_merged = "NO" , 
                                    update_days = 5, window_days = 262 )


ggplot(list_peak_anon_trial[Code_used_DS==code_plot #
                            & port_date<= (today_plot+60) & port_date>= min_date_plot #
], aes(port_date, appDS)) +  
  geom_line(colour="dodgerblue3") +
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                date_past_peak==date_peak_plot,.(app_past_peak, date_past_peak)])
             ,aes(date_past_peak, app_past_peak)    , size=2,colour="dodgerblue3")  + #
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                port_date==(pur_day_plot),.(appDS, port_date)])
             ,aes(port_date, appDS)    , size=2,colour="dodgerblue3")  + # 
  geom_point(data=unique(list_peak_anon_trial[Code_used_DS==code_plot & 
                                                anon==anon_plot & 
                                                port_date==(today_plot),.(appDS, port_date)])
             ,aes(port_date, appDS)    , size=2,colour="dodgerblue3") +
  geom_vline(xintercept = pur_day_plot, linetype="dashed", 
             color = "red", size=0.5) +
  geom_vline(xintercept = date_peak_plot, linetype="dashed", 
             color = "red", size=0.5) +
  geom_vline(xintercept = today_plot, linetype="dashed", 
             color = "red", size=0.5) +  coord_cartesian(clip = "off") +
  draw_label("\nPurchase day", x = pur_day_plot, y =2.15, size=15, fontfamily="serif") +
  draw_label("\nPeak day", x = date_peak_plot, y =2.15, size=15, fontfamily="serif") +
  draw_label("\nDay t", x = today_plot, y =2.15, size=15, fontfamily="serif") +
  scale_y_continuous("Prices (£)")+
  theme_bw() + 
  theme_classic() + #
  theme(text = element_text(, 
                            size=19, family="serif"   ),
        legend.position = "bottom",
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() )  +
  xlab("") +
  scale_x_date(labels = date_format("%Y-%m"), date_breaks = "3 month")

setwd(wd_figures)
ggsave("nohold_gainpeak_gainpurch.pdf", width = 10, height = 5)


